/**
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package edu.uci.ics.crawler4j.crawler;

import java.util.Map;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.util.EntityUtils;

import edu.uci.ics.crawler4j.parser.ParseData;
import edu.uci.ics.crawler4j.url.WebURL;

/**
 * This class contains the data for a fetched and parsed page.
 * 
 * @author Yasser Ganjisaffar <lastname at gmail dot com>
 */
public class Page
{

    /**
     * The URL of this page.
     */
    protected WebURL url;

    /**
     * The content of this page in binary format.
     */
    protected byte[] contentData;

    /**
     * The ContentType of this page. For example: "text/html; charset=UTF-8"
     */
    protected String contentType;

    /**
     * The encoding of the content. For example: "gzip"
     */
    protected String contentEncoding;

    /**
     * The charset of the content. For example: "UTF-8"
     */
    protected String contentCharset;

    /**
     * Last-modified header
     */
    protected String lastModified;
    /**
     * The parsed data populated by parsers
     */
    protected ParseData parseData;

    public Page(WebURL url)
    {
        this.url = url;
    }

    public WebURL getWebURL()
    {
        return url;
    }

    public void setWebURL(WebURL url)
    {
        this.url = url;
    }

    /**
     * Loads the content of this page from a fetched HttpEntity.
     */
    public void load(HttpEntity entity, Map<String, String> headers) throws Exception
    {

        contentType = null;
        Header type = entity.getContentType();
        if (type != null) {
            contentType = type.getValue();
        }

        contentEncoding = null;
        Header encoding = entity.getContentEncoding();
        if (encoding != null) {
            contentEncoding = encoding.getValue();
        }

        if (headers != null) {
            lastModified = headers.get("Last-Modified");
        }

        contentCharset = EntityUtils.getContentCharSet(entity);
        // FIXME 20130314 gives a NPE since page.getContentData() is null
        // contentCharset = null;
        // ContentType defaultContentType = ContentType.getOrDefault(entity);
        // if (defaultContentType != null) {
        // contentCharset = ContentType.getOrDefault(entity).getCharset().displayName();
        // }

        contentData = EntityUtils.toByteArray(entity);

    }

    /**
     * Returns the parsed data generated for this page by parsers
     */
    public ParseData getParseData()
    {
        return parseData;
    }

    public void setParseData(ParseData parseData)
    {
        this.parseData = parseData;
    }

    /**
     * Returns the content of this page in binary format.
     */
    public byte[] getContentData()
    {
        return contentData;
    }

    public void setContentData(byte[] contentData)
    {
        this.contentData = contentData;
    }

    /**
     * Returns the ContentType of this page. For example: "text/html; charset=UTF-8"
     */
    public String getContentType()
    {
        return contentType;
    }

    public void setContentType(String contentType)
    {
        this.contentType = contentType;
    }

    /**
     * Returns the encoding of the content. For example: "gzip"
     */
    public String getContentEncoding()
    {
        return contentEncoding;
    }

    public void setContentEncoding(String contentEncoding)
    {
        this.contentEncoding = contentEncoding;
    }

    /**
     * Returns the charset of the content. For example: "UTF-8"
     */
    public String getContentCharset()
    {
        return contentCharset;
    }

    public void setContentCharset(String contentCharset)
    {
        this.contentCharset = contentCharset;
    }

    public String getLastModified()
    {
        return lastModified;
    }

    public void setLastModified(String lastModified)
    {
        this.lastModified = lastModified;
    }

    @Override
    public String toString()
    {
        StringBuilder builder = new StringBuilder();
        builder.append("Page [url=").append(url).append(", contentData?=").append(contentData != null).append(
                ", contentType=").append(contentType).append(", contentEncoding=").append(contentEncoding).append(
                ", contentCharset=").append(contentCharset).append(", parseData?=").append(parseData != null).append(
                ", lastModified=").append(lastModified).append("]");
        return builder.toString();
    }
}
